#!/usr/bin/env python3

from __future__ import print_function

from httpobs.conf import API_URL

import grequests
import os
import requests
import sys
import time

if 'HTTPOBS_DEV' in os.environ:  # TODO: use httpobs.conf
    MAX_QUEUE = 64
else:
    MAX_QUEUE = 256

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print('Usage: ' + sys.argv[0] + ' <file>')
        sys.exit(1)

    start_time = time.time()
    total_scanned = 0

    s = requests.Session()

    try:
        with open(sys.argv[1], 'r') as alexafp:
            hosts = [host.strip().split(',')[1] if ',' in host else host.strip() for host in alexafp]
    except:
        print('Cannot open ' + sys.argv[1])
        sys.exit(1)

    while True:
        loop_time = time.time()

        # Get the queue availability
        try:
            r = s.get(API_URL + '/getScannerStates').json()
        except:
            time.sleep(5)
            continue

        available = MAX_QUEUE - r.get('PENDING', 0) - r.get('RUNNING', 0) - r.get('STARTING', 0)

        print('Queue availability: {queue_avail}. Total scanned: {total_scanned}. Pending: {pending}.'.format(
            queue_avail=available, total_scanned=total_scanned, pending=r.get('PENDING', 0)))

        # Quit if the scanner reports that nothing is pending
        if not hosts and r.get('PENDING', 0) == 0:
            break

        if available > 0:
            targets = hosts[:available]
            urls = [API_URL + '/analyze?host=' + host for host in targets]
            total_scanned += available

            # Start up a new mass scan
            try:
                rs = (grequests.post(u) for u in urls)
                grequests.map(rs)
            except:
                time.sleep(5)
                raise

            hosts = hosts[available:]

        # If the previous loop completely quickly, cooldown a moment
        if time.time() - loop_time < 5:
            time.sleep(5)

    total_time = int(time.time() - start_time)
    print('Elapsed time: {elapsed_time}s'.format(elapsed_time=total_time))
    print('Scans/sec: {speed}'.format(speed=total_scanned / total_time))
